// cd /projects/hsieh_project/proj_202110/code_2_202011/
// qstata mkt_d_sect_sum.do &

set linesize 255
capture log close
log using /projects/hsieh_project/proj_202110/code_2_202011/mkt_d_sect_sum_log, replace text

clear all
cd /projects/

di "$S_TIME $S_DATE"

global rev_date: display %tdYYNNDD date("$S_DATE", "DMY")
di "${rev_date}"

global dir_proj "/projects/hsieh_project/proj_202110"

global dir_do "${dir_proj}/code_2_202011"
global dir_data "${dir_proj}/data"
global dir_out "${dir_proj}/output/202011_main"
noi capture mkdir ${dir_out}
global dir_outf "${dir_out}/mkt_d_sect_sum"
noi capture mkdir ${dir_outf}
noi capture mkdir ${dir_outf}/data/

global year1 = 1977
global year2 = 2017

global gl_perc = "10"
local l_perc = "10"

local decile = "10 20 30 40 50 60 70 80 90 100"

do "/projects/hsieh_project/code_0_general/f_rounding.do"
do "/projects/hsieh_project/code_0_general/f_reg.do"

global ds_ind "${dir_data}/ind_sum_all"

global lvmkt = "est msa1983 fips zipcode"

tempfile ds_temp
global ds_temp = "`ds_temp'"

//==============================================================================
// Generate Sato-Vartia weight by sector

use ${ds_ind}, clear

keep if inlist(year,${year1},${year2})
keep year sector ch_ind12 emp_ind

sort ch_ind12 year

gen ln_emp_ind = ln(emp_ind)
replace emp_ind = - emp_ind if year == ${year1}
replace ln_emp_ind = - ln_emp_ind if year == ${year1}

sort sector ch_ind12 year
collapse (first) emp_ind_y1=emp_ind (sum) emp_ind ln_emp_ind, by(sector ch_ind12)
gen w_num = emp_ind / ln_emp_ind
replace w_num = -emp_ind_y1 if emp_ind == 0
by sector: egen w_den = total(w_num)
gen w_sv_sect = w_num / w_den

save ${dir_outf}/sv_sector.dta, replace

//==============================================================================
// Calculate change in number of markets and aggregate to sector level

global xlsx_tab "${dir_outf}/mkt_d_by_sect.xlsx"

do ${dir_do}/mkt_d_ind_0.do "geo"
drop *_10 *_1 *_10b

*Dropping decile level measures
foreach dec in `decile' {
	capture noi drop *_`dec' 
	capture noi drop *_`dec'd
}

merge 1:1 sector ch_ind12 using ${dir_outf}/sv_sector, assert(match) nogen keepusing(w_sv_sect)

/*
foreach vmkt in $lvmkt {
	replace mu_mkt_`vmkt'_d = mu_mkt_`vmkt'_d * w_sv_sect
	replace ln_mu_mkt_`vmkt'_d = ln_mu_mkt_`vmkt'_d * w_sv_sect
}
collapse (sum) mu_mkt_* ln_mu_mkt_*, by(sector)
*/

foreach vmkt in $lvmkt {
	gen sd_ln_mu_`vmkt'_d = ln_mu_mkt_`vmkt'_d
}

/* Regressions of change in ln(markets / firm) on sector dummies */
reg ln_mu_mkt_est_d i.sector [aw = w_sv]
outreg2 using "${dir_outf}/reg_chg_ln_est_sector_dummy.doc", word replace

reg ln_mu_mkt_msa1983_d i.sector [aw = w_sv]
outreg2 using "${dir_outf}/reg_chg_ln_msa1983_sector_dummy.doc", word replace

*Back to means/SD by sector 
collapse (mean) ln_mu_mkt_* mu_mkt_* (sd) sd* (count) count_ind = ch_ind12 [aw = w_sv_sect], by (sector)


foreach vmkt in $lvmkt {
	rename mu_mkt_`vmkt'_d mu_`vmkt'_d
	rename ln_mu_mkt_`vmkt'_d ln_mu_`vmkt'_d
	
	rounding_4dig "mu_`vmkt'_d ln_mu_`vmkt'_d sd_ln_mu_`vmkt'_d"
}

recast int sector

/* REDACTED 
Code labelling different sectors redacted */
label def lab_sector "REDACTED" "MfG" "REDACTED" "Finance" "REDACTED" "Services" "REDACTED" "Util. & Transp." "REDACTED" "Retail" "REDACTED" "Wholesale" "REDACTED" "Construction", replace
lab values sector lab_sector

export excel sector mu_* count_ind using "${xlsx_tab}", sheet("d") sheetreplace firstrow(var) keepcellfmt
export excel sector ln_mu_* count_ind using "${xlsx_tab}", sheet("ln_d") sheetreplace firstrow(var) keepcellfmt 
export excel sector sd_ln_mu_* count_ind using "${xlsx_tab}", sheet("SD ln_d") sheetreplace firstrow(var) keepcellfmt 

//==============================================================================
*Repeat for all industries (i.e. without sector) for every 5 years

clear
save ${ds_temp}_all_5year, replace emptyok

global xlsx_tab "${dir_outf}/mkt_d_all.xlsx"

foreach iyear in 1982 1987 1992 1997 2002 2007 2012 2017 {
	
	global year1 = 1977
	global year2 = `iyear'
	
	do ${dir_do}/mkt_d_ind_0.do "geo"
	drop *_10 *_1 *_10b

	*Dropping decile level measures
	foreach dec in `decile' {
		capture noi drop *_`dec' 
		capture noi drop *_`dec'd
	}
	
	gen year1 = $year1 
	gen year2 = $year2 
	
	*Merge with pre-generated SV weights
	merge 1:1 year1 year2 ch_ind12 using ${dir_out}/sv_weights/sv_weights_5years, keepusing(w_num)
	keep if _merge == 3 
	drop _merge

	foreach vmkt in $lvmkt {
		gen sd_ln_mu_`vmkt'_d = ln_mu_mkt_`vmkt'_d
		gen median_ln_mu_`vmkt'_d = ln_mu_mkt_`vmkt'_d
	}

	collapse (mean) ln_mu_mkt_* (sd) sd_ln_mu* (median) median_ln_mu* [aw = w_num], by(year1 year2)


	foreach vmkt in $lvmkt {
		rename ln_mu_mkt_`vmkt'_d ln_mu_`vmkt'_d
		
		rounding_4dig "sd_ln_mu_`vmkt'_d median_ln_mu_`vmkt'_d ln_mu_`vmkt'_d"
	}

	gen sample = "All Industries"
	
	append using ${ds_temp}_all_5year
	save ${ds_temp}_all_5year, replace
}

use ${ds_temp}_all_5year, clear 
sort year2

export excel year* sample ln_mu_* using "${xlsx_tab}", sheet("Mean ln_d") sheetreplace firstrow(var) keepcellfmt 
export excel year* sample sd_ln_mu_* using "${xlsx_tab}", sheet("SD ln_d") sheetreplace firstrow(var) keepcellfmt 
export excel year* sample median_ln_mu_* using "${xlsx_tab}", sheet("Median ln_d") sheetreplace firstrow(var) keepcellfmt 

//==============================================================================
// 5-year intervals;
// 1) All, 2) MfG, 3) Finance, Services, Retail, Wholesale

clear
save ${ds_temp}_a, replace emptyok

foreach iyear in 1982 1987 1992 1997 2002 2007 2012 2017 {
	global year1 = 1977
	global year2 = `iyear'
	
	clear
	qui do ${dir_do}/mkt_d_ind_0.do "geo"
	keep sector ch_ind12 ln_mu_mkt_*
	gen year1 = ${year1}
	gen year2 = ${year2}
	append using ${ds_temp}_a
	save ${ds_temp}_a, replace
}

use ${ds_temp}_a, clear
merge 1:1 year1 year2 ch_ind12 using ${dir_out}/sv_weights/sv_weights_5years, keepusing(w_num) assert(match) nogen

capture program drop f_sum
program f_sum

sort year1 year2 sector ch_ind12
by year1 year2 sector: egen w_den = total(w_num)
gen w_sv_sect = w_num / w_den
foreach vmkt in $lvmkt {
	replace ln_mu_mkt_`vmkt'_d = ln_mu_mkt_`vmkt'_d * w_sv_sect
}
collapse (sum) ln_mu_mkt_*, by(year1 year2 sector)

end

preserve
replace sector = 0
f_sum
save ${ds_temp}_b, replace
*export to CSV for aggregate change for every 5 years:
drop *_10 *_1 *_10b

*Dropping decile level measures
foreach dec in `decile' {
	capture noi drop *_`dec' 
	capture noi drop *_`dec'd
}
export delimited using ${dir_outf}/agg_d_ln_mkt_per_firm.csv, replace
restore

preserve
/* REDACTED sector code */
keep if sector == "REDACTED"
f_sum
append using ${ds_temp}_b
save ${ds_temp}_b, replace
restore

preserve
keep if inlist(sector,"REDACTED")
replace sector = 2
f_sum
append using ${ds_temp}_b
save ${ds_temp}_b, replace
restore

use ${ds_temp}_b, clear
rounding_4dig "ln_mu_mkt_msa1983_d"

local y = "ln_mu_mkt_msa1983_d"
local y_lab = "d ln(Avg # of MSA1983) (Base=1977)"
local x = "year2"
/* REDACTED
Code specifying sector code redacted */
twoway (connected `y' `x' if sector == "REDACTED") ///
	(connected `y' `x' if sector == "REDACTED") ///
	(connected `y' `x' if sector == "REDACTED"), ///
	ytitle("`y_lab'") xtitle("Year") ///
	legend(order(1 "All" 2 "MfG" 3 "Finance, Services, Util & Transp, Retail, Wholesale") row(1))
graph export "${dir_outf}/Y_`y'_X_`x'.png", replace width(3000) height(2000)
export delimited `y' `x' sector using ${dir_outf}/Y_`y'_X_`x'.csv, replace

local y = "ln_mu_mkt_est_d"
local y_lab = "d ln(Avg # of EST) (Base=1977)"
local x = "year2"
twoway (connected `y' `x' if sector == "REDACTED") ///
	(connected `y' `x' if sector == "REDACTED") ///
	(connected `y' `x' if sector == "REDACTED"), ///
	ytitle("`y_lab'") xtitle("Year") ///
	legend(order(1 "All" 2 "MfG" 3 "Finance, Services, Util & Transp, Retail, Wholesale") row(1))
graph export "${dir_outf}/Y_`y'_X_`x'.png", replace width(3000) height(2000)
export delimited `y' `x' sector using ${dir_outf}/Y_`y'_X_`x'.csv, replace

di "Ended at $S_DATE $S_TIME"
capture log close
// End of do file
